#!/usr/bin/env python
# encoding: utf-8
"""
#-------------------------------------------------------------------#
#                   CONFIDENTIAL --- CUSTOM STUDIOS                 #     
#-------------------------------------------------------------------#
#                                                                   #
#                   @Project Name : Globallawonline                #
#                                                                   #
#                   @File Name    : text.py                      #
#                                                                   #
#                   @Programmer   : 李建                            #
#                                                                   #  
#                   @Start Date   : 2021/3/2 16:46                 #
#                                                                   #
#                   @Last Update  : 2021/3/2 16:46                 #
#                                                                   #
#-------------------------------------------------------------------#
# Classes:                                                          #
#                                                                   #
#-------------------------------------------------------------------#
"""
import hashlib
import re

import pymysql

from DateProcessFunction.datefunction import Timefunction
from settings import LawNews_CJ_MYSQL, LawNews_Target_MYSQL


def deal_newsdata(datadict: dict):
    """
    将一条采集的新闻记录转为目标表的记录
    import: datadict 一条采集的新闻记录
    return: result_dict 一条目标表的新闻记录字典
    """
    fielddict = {"新闻标题": 'Title', "发表时间": 'PublishDate', "国别": 'SortA', "语言": 'SortB',
                 "新闻全文": 'Fulltexthtml', "原文网址": 'DownLoadUrl', "下载网站中文名称": 'DownLoadWebNameC',
                 "下载网站英文名称": 'DownLoadWebNameE', "网站类别": "Website"}
    result_dict = {}
    fieldlist = list(fielddict.keys())
    datalist = list(datadict.keys())
    for field in fieldlist:
        if field in datalist:
            if field == "发表时间":
                resultvaule = Timefunction().strpdatetime(datadict[field])
                if resultvaule == 555:
                    return False
                result_dict[fielddict[field]] = resultvaule
            elif field == "新闻全文":       # 对图片无格式的富文本添加style="width: 100%;格式
                image_regular = r'''<img.+?/>'''      # 匹配图片标签的正则表达式
                image_trs = re.findall(image_regular, datadict[field], re.S)
                for image_tr in image_trs:
                    if ("style" not in image_tr) and ("width" not in image_tr):
                        new_image_tr = image_tr.replace(r'''/>''', r''' style="width: 100%;"/>''')
                        datadict[field] = datadict[field].replace(image_tr, new_image_tr)
                result_dict[fielddict[field]] = datadict[field]
            else:
                result_dict[fielddict[field]] = datadict[field]
    result_dict["SYSID"] = str(hashlib.md5(datadict["原文网址"].encode('utf-8')).hexdigest())
    return result_dict


tablename = "textdata_333309479904870428"
downurl = "http://www.moj.gov.la/lo/%E0%BB%81%E0%BA%82%E0%BA%A7%E0%BA%87-%E0%BB%84%E0%BA%8A%E0%BA%AA%E0%BA%BB%E0%BA%A1%E0%BA%9A%E0%BA%B9%E0%BA%99/news/%E0%BA%81%E0%BA%AD%E0%BA%87%E0%BA%9B%E0%BA%B0%E0%BA%8A%E0%BA%B8%E0%BA%A1-%E0%BA%AA%E0%BA%B3%E0%BA%A1%E0%BA%B0%E0%BA%99%E0%BA%B2/%E0%BB%9C%E0%BB%88%E0%BA%A7%E0%BA%8D%E0%BA%8A%E0%BA%B2%E0%BA%A7%E0%BB%9C%E0%BA%B8%E0%BB%88%E0%BA%A1%E0%BA%9E%E0%BA%B0%E0%BB%81%E0%BA%99%E0%BA%81%E0%BA%8D%E0%BA%B8%E0%BA%95%E0%BA%B4%E0%BA%97%E0%BA%B3%E0%BB%81%E0%BA%82%E0%BA%A7%E0%BA%87-%E0%BB%84%E0%BA%8A%E0%BA%AA%E0%BA%BB%E0%BA%A1%E0%BA%9A%E0%BA%B9%E0%BA%99"
cjsqlconn = pymysql.connect(host=LawNews_CJ_MYSQL['MYSQL_HOST'], port=LawNews_CJ_MYSQL['MYSQL_PORT'],
                            db=LawNews_CJ_MYSQL['MYSQL_DB'], user=LawNews_CJ_MYSQL['MYSQL_USER'],
                            passwd=LawNews_CJ_MYSQL['MYSQL_PASSWD'], charset='utf8')
targetsqlconn = pymysql.connect(host=LawNews_Target_MYSQL['MYSQL_HOST'],
                                            port=LawNews_Target_MYSQL['MYSQL_PORT'],
                                            db=LawNews_Target_MYSQL['MYSQL_DB'],
                                            user=LawNews_Target_MYSQL['MYSQL_USER'],
                                            passwd=LawNews_Target_MYSQL['MYSQL_PASSWD'], charset='utf8')
cjsqlcorsor = cjsqlconn.cursor()  # 获取采集数据库游标
sql = "select COLUMN_NAME from information_schema.COLUMNS where table_name = '%s' and table_schema = '%s';" \
              % (tablename, LawNews_CJ_MYSQL['MYSQL_DB'])
cjsqlcorsor.execute(sql)
cjtablefiles = cjsqlcorsor.fetchall()
cjtablefilelist = []    # 采集表对应的字段列表
for cjtablefile in cjtablefiles:
    cjtablefilelist.append(cjtablefile[0])
sql = "select * FROM %s where 原文网址 = '%s'" % (tablename, downurl)
cjsqlcorsor.execute(sql)
newsdata = cjsqlcorsor.fetchall()
newsdatadict = {}
for i in range(len(cjtablefilelist)):
    newsdatadict[cjtablefilelist[i]] = newsdata[0][i]
targetdict = deal_newsdata(newsdatadict)
targetfilelist = list(targetdict.keys())
targetfilestr = ",".join(targetfilelist)
targetvaluelist = list(targetdict.values())
targetvalue = tuple(targetvaluelist)
# 向目标表中插入记录的语句列表
targetsql = '''insert into %s (%s) values %s''' % (
            LawNews_Target_MYSQL["MYSQL_TABLE"], targetfilestr, targetvalue)
targetsqlcorsor = targetsqlconn.cursor()
targetsqlcorsor.execute(targetsql)
targetsqlconn.commit()
pass